/***************************************************************************
                          	    SeqCom

The DPstructure software, which includes SeqCom, is intended for the analysis
of intrinsically unstructured proteins and disordered protein regions.

SeqCom is a primitive sequence composition algorithm that uses statistical
data to make predictions on the position of binding sites in the amino acid
sequence on nucleic acid binding IUPs.
                             -------------------

(C) 2009 by Russell C. Goodman
**************************************************************************/
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
using namespace std;

double NucAcidData(string residue);

int main(int argc, char *argv[])
{
//Declaring i/o files
ifstream in_seq;
ofstream out_st;
//Declaring vectors and array
vector<string> seq;
vector<string> seq_det;
double seq_num[3000];
vector<string> IUPtype;
bool Hr[3000];
bool bind[3000];
vector<int> IUregion;
//Declaring temporary variables
int tempi;
string amino_acid;
string file_seq;
string file_IUPtype;
string file_out;
int num = 0;
string line;
int seq_size;
int null;
double tempd;
double sum;
double sumdiv;
double lnsumdiv;

in_seq.open(argv[1]);
//Obtaining the Sequence
while (!in_seq.eof())
	{
	getline(in_seq, line);
	seq.push_back(line);
	}
for (int ix = 1; ix != seq.size()-1; ++ix)
	{
	string pre_seq_ind = seq[ix];
	for (int ixx = 0; ixx != pre_seq_ind.size()-1; ++ixx)
		{
		int num = 0;
		string line2 = pre_seq_ind.substr(ixx, 1);
		seq_det.push_back(line2);
		++num;
		}
	}

	//Setting amino acid parameters to amino acid sequence
	for (int ix = 0; ix != seq_det.size(); ++ix)
		{
		seq_num[ix] = NucAcidData(seq_det[ix]);
		}

	//Searching sequence for high probability amino acids
	//Uses a bool vector
	for (int ix = 0; ix != seq_det.size(); ++ix)
		{
		if (seq_num[ix] >= 0.7)
			{
			Hr[ix] = true;
			}
		}

	//Analysis of probable binding sites
	//Allows for flexability in traditional 4 amino acid alpha helix
	for (int ix = 0; ix != seq_det.size(); ++ix)
		{
		if (Hr[ix] == true)
			{
			bind[ix] = true;
			}
			if (Hr[ix] == true && Hr[ix+3] == true)
				{
				bind[ix+1] = true;
				bind[ix+2] = true;
				}
		}


	out_st.open(argv[2], ofstream::app);
	for (int cnt = 0; cnt != seq_det.size(); ++cnt)
		{
		out_st << seq_det[cnt];
		if (bind[cnt] == true)
			{
			out_st << " " << "2 1 2 2" << endl;
			}
		if (bind[cnt] == false)
			{
			out_st << " " << "2 0 2 2" << endl;
			}
		}

	in_seq.close();
	out_st.close();

return 0;
}


double NucAcidData(string residue)
	{
	//Nucleic Acid Data
	double A = 0.7;
	double G = 1.3;
	double V = 0.3;
	double L = 0.1;
	double I = 0.1;
	double M = 0.0;
	double P = 0.4;
	double S = 0.7;
	double T = 0.6;
	double C = 0.1;
	double N = 0.7;
	double Q = 0.6;
	double D = 0.4;
	double E = 1.6;
	double K = 1.9;
	double R = 2.1;
	double H = 0.1;
	double Y = 0.4;
	double F = 0.1;
	double W = 0.1;


		if (residue == "A")
			{
			return A;
			}
   		if (residue == "G")
			{
			return G;
			}
		if (residue == "V")
			{
			return V;
			}
		if (residue == "L")
			{
			return L;
			}
		if (residue == "I")
			{
			return I;
			}
		if (residue == "M")
			{
			return M;
			}
		if (residue == "P")
			{
			return P;
			}
		if (residue == "S")
			{
			return S;
			}
		if (residue == "T")
			{
			return T;
			}
		if (residue == "C")
			{
			return C;
			}
		if (residue == "N")
			{
			return N;
			}
		if (residue == "Q")
			{
			return Q;
			}
		if (residue == "D")
			{
			return D;
			}
		if (residue == "E")
			{
			return E;
			}
		if (residue == "K")
			{
			return K;
			}
		if (residue == "R")
			{
			return R;
			}
		if (residue == "H")
			{
			return H;
			}
		if (residue == "Y")
			{
			return Y;
			}
		if (residue == "F")
			{
			return F;
			}
		if (residue == "W")
			{
			return W;
			}
}
